Group sequential designs

Simulations of GSD stopping probabilities

First, let us replicate the table of known stopping probabilities for the following group sequential design:

  • Number of analyses: 3
  • Number of patients per group: \(\mathbf{n} = (20, 40, 60)\)
  • Upper bound stopping: \(\mathbf{u} = (2.5, 2, 1.5)\)
  • Lower bound stopping: \(\boldsymbol{\ell} = (0, 0.75, 1.5)\)
  • Number of patients at each analysis: 20
  • Null hypothesis: \(\theta = 0\)
  • Alternative hypothesis: \(\theta = \delta = 0.5\)
  • Known variance: \(\sigma^2 = 1\)

Probabilities are as follows:

\(\theta = 0\) \(\theta = 0.5\)
Analysis Prob stop for futility Prob stop for efficacy Prob stop for futility Prob stop for efficacy
1 0.500 0.006 0.057 0.179
2 0.299 0.019 0.042 0.420
3 0.137 0.038 0.049 0.253

First, let us simulate the straightforward case of Analysis 1 under the null.

For futility:

theta <- 0
sigma2 <- 1

z_vec <- c()

for (i in 1:50000) {
  x1 <- rnorm(20)
  x2 <- rnorm(20)

  mean.diff <- mean(x1 - x2)

  z <- mean.diff * sqrt( 20 / (2*sigma2) )
  
  z_vec[i] <-  z
}

mean(z_vec <= 0)
## [1] 0.49724

For efficacy:

for (i in 1:50000) {
  x1 <- rnorm(20)
  x2 <- rnorm(20)

  mean.diff <- mean(x1 - x2)

  z <- mean.diff * sqrt( 20 / (2*sigma2) )
  
  z_vec[i] <-  z
}

mean(z_vec >= 2.5)
## [1] 0.00654

Now, Analysis 1 under the alternative.

For futility:

theta <- 0.5
sigma2 <- 1

z_vec <- c()

for (i in 1:50000) {
  x1 <- rnorm(20, mean = theta, sd = sqrt(sigma2))
  x2 <- rnorm(20)

  mean.diff <- mean(x1 - x2)

  z <- mean.diff * sqrt( 20 / (2*sigma2) )
  
  z_vec[i] <-  z
}

mean(z_vec <= 0)
## [1] 0.05814

For efficacy:

theta <- 0.5
sigma2 <- 1

z_vec <- c()

for (i in 1:50000) {
  x1 <- rnorm(20, mean = theta, sd = sqrt(sigma2))
  x2 <- rnorm(20)

  mean.diff <- mean(x1 - x2)

  z <- mean.diff * sqrt( 20 / (2*sigma2) )
  
  z_vec[i] <-  z
}

mean(z_vec >= 2.5)
## [1] 0.1796

Now, the case of all three analyses under the null.

theta <- 0
sigma2 <- 1

z_vec <- c()
z2_vec <- c()
z3_vec <- c()

analysis2_count <- 0
analysis3_count <- 0

for (i in 1:100000) {
  x1 <- rnorm(20)
  x2 <- rnorm(20)

  mean.diff <- mean(x1 - x2)

  z <- mean.diff * sqrt( 20 / (2*sigma2) )
  
  z_vec[i] <-  z
  
  if ( (z > 0) & (z < 2.5) ) {
    
    analysis2_count <- analysis2_count + 1
    
    x3 <- rnorm(20)
    x4 <- rnorm(20)
    
    x1 <- c(x1, x3)
    x2 <- c(x2, x4)
    
    mean.diff <- mean(x1 - x2)
    
    z2 <- mean.diff * sqrt( 40 / (2*sigma2) )
    
    z2_vec[analysis2_count] <- z2
    
    if ( (z2 > 0.75) & (z2 < 2) ) {
      
      analysis3_count <- analysis3_count + 1
      
      x5 <- rnorm(20)
      x6 <- rnorm(20)
      
      x1 <- append(x1, x5)
      x2 <- c(x2, x6)
    
      mean.diff <- mean(x1 - x2)
    
      z3 <- mean.diff * sqrt( 60 / (2*sigma2) )
    
      z3_vec[analysis3_count] <- z3
      
    }
    
  }
  
}

For futility:

mean(z_vec <= 0)
## [1] 0.50215
mean(z_vec > 0 & z_vec < 2.5) * mean(z2_vec <= 0.75) 
## [1] 0.29878
mean(z_vec > 0 & z_vec < 2.5) * mean(z2_vec > 0.75 & z2_vec < 2) * mean(z3_vec < 1.5)
## [1] 0.13591

For efficacy:

mean(z_vec > 2.5)
## [1] 0.00613
mean(z_vec > 0 & z_vec < 2.5) * mean(z2_vec > 2) 
## [1] 0.01941
mean(z_vec > 0 & z_vec < 2.5) * mean(z2_vec > 0.75 & z2_vec < 2) * mean(z3_vec > 1.5)
## [1] 0.03762

Now, the case of all three analyses under the alternative.

theta <- 0.5
sigma2 <- 1

z_vec <- c()
z2_vec <- c()
z3_vec <- c()

analysis2_count <- 0
analysis3_count <- 0

for (i in 1:100000) {
  x1 <- rnorm(20, mean = theta)
  x2 <- rnorm(20)

  mean.diff <- mean(x1 - x2)

  z <- mean.diff * sqrt( 20 / (2*sigma2) )
  
  z_vec[i] <-  z
  
  if ( (z > 0) & (z < 2.5) ) {
    
    analysis2_count <- analysis2_count + 1
    
    x3 <- rnorm(20, mean = theta)
    x4 <- rnorm(20)
    
    x1 <- c(x1, x3)
    x2 <- c(x2, x4)
    
    mean.diff <- mean(x1 - x2)
    
    z2 <- mean.diff * sqrt( 40 / (2*sigma2) )
    
    z2_vec[analysis2_count] <- z2
    
    if ( (z2 > 0.75) & (z2 < 2) ) {
      
      analysis3_count <- analysis3_count + 1
      
      x5 <- rnorm(20, mean = theta)
      x6 <- rnorm(20)
    
      x1 <- append(x1, x5)
      x2 <- append(x2, x6)
    
      mean.diff <- mean(x1 - x2)
    
      z3 <- mean.diff * sqrt( 60 / (2*sigma2) )
    
      z3_vec[analysis3_count] <- z3
      
    }
    
  }
  
}

For futility:

mean(z_vec <= 0)
## [1] 0.05693
mean(z_vec > 0 & z_vec < 2.5) * mean(z2_vec <= 0.75) 
## [1] 0.04187
mean(z_vec > 0 & z_vec < 2.5) * mean(z2_vec > 0.75 & z2_vec < 2) * mean(z3_vec <= 1.5)
## [1] 0.04969

For efficacy:

mean(z_vec > 2.5)
## [1] 0.17691
mean(z_vec > 0 & z_vec < 2.5) * mean(z2_vec >= 2) 
## [1] 0.42091
mean(z_vec > 0 & z_vec < 2.5) * mean(z2_vec > 0.75 & z2_vec < 2) * mean(z3_vec >= 1.5)
## [1] 0.25369

Function for GSDs

In order to generate a function, we need the following information:

  • Number of analyses
  • Upper bound stopping vector \(\mathbf{u}\)
  • Lower bound stopping vector \(\mathbf{l}\)
  • Number of patients at each analysis as a vector \(\mathbf{n}\)
  • Null hypothesis value for \(\theta\)
  • Alternative hypothesis value for \(\delta\)
  • Known variance: \(\sigma^2\)

The length of the upper and lower bound vectors should be equal to the number of analyses that are planned. The length of the number of patients at each analysis vector should also be equal to the number of analyses that are planned.

gsd_simulations <- function(n_analyses = 3, 
                            upper_bounds = c(2.5, 2, 1.5),
                            lower_bounds = c(0, 0.75, 1.5),
                            n_patients = c(20, 40, 60),
                            null_hypothesis = 0,
                            alt_hypothesis = 0.5,
                            variance = 1) {
  
  # sanity checks
  if(length(upper_bounds) != n_analyses) print("Warning: number of upper bounds must equal number of analyses")
  if(length(lower_bounds) != n_analyses) print("Warning: number of lower bounds must equal number of analyses")
  if(length(n_patients) != n_analyses) print("Warning: number of patients vector must equal number of analyses")
  
  if(length(upper_bounds) != length(lower_bounds)) {
    print("Warning: number of upper bounds must equal number of analyses")
  }
  
  # assign values for null and alt hypotheses
  theta_0 <- null_hypothesis
  delta <- alt_hypothesis
  
  # empty mean vectors to fill
  mean_0 <- c()
  mean_1 <- c()
  
  # need to parse the upper and lower boundaries of the design
  # for futility and efficacy, must put the bounds of integration correctly 
  # for pmvnorm
  futility_l_bounds <- list()
  futility_u_bounds <- list()
  efficacy_l_bounds <- list()
  efficacy_u_bounds <- list()

  n_analyses <- length(upper_bounds)

  for (i in 1:n_analyses) {
    
    # special case of i = 1
    if (i == 1) {
      futility_l_bounds[[i]] <- lower_bounds[i]
      futility_u_bounds[[i]] <- upper_bounds[i]
      efficacy_l_bounds[[i]] <- lower_bounds[i]
      efficacy_u_bounds[[i]] <- upper_bounds[i]
      next
    }
    
    # all other cases
    futility_l_bounds[[i]] <- c(lower_bounds[1:i-1], -Inf)
    futility_u_bounds[[i]] <- c(upper_bounds[1:i-1], lower_bounds[i])
    
    efficacy_l_bounds[[i]] <- c(lower_bounds[1:i-1], upper_bounds[i])
    efficacy_u_bounds[[i]] <- c(upper_bounds[1:i-1], Inf)
  }
  
  # list of probabilities to return
  probs_to_return <- list()
  
  # list of SIGMAs
  SIGMA_list <- list()
    
  for (i in 1:n_analyses) {
    if (i == 1) next
    
    # start with diagonal matrix for SIGMA
    SIGMA <- diag(nrow = i)
    
    # n = 2, need to fill all but 11, 22
    # n = 3, need to fill all but 11, 22, 33
    # n = 4, need to fill all but 11, 22, 33, 44
    # etc. 
    for(i in 1:i) {
      for(j  in 1:i) {
        
        # leave the 1s on the diagonal, skip this iteration of for loop
        if(i == j) next
        
        # when i is less than j, the lower number of patients will be in numerator
        if(i < j) SIGMA[i,j] <- sqrt(n_patients[i] / n_patients[j])
        
        # when i is greater than j, the lower number of patients will be in numerator
        if(i > j) SIGMA[i,j] <- sqrt(n_patients[j] / n_patients[i])
        
      }
    }
    
    SIGMA_list[[i]] <- SIGMA
  }
  
  
  for (i in 1:n_analyses) {
    
    ##############
    # ANALYSIS 1 #
    ##############
    if(i == 1) {
      # mean under null
      mean_0[i] <- theta_0 * sqrt(n_patients[i]/(2*variance))
      
      # mean under alternative
      mean_1[i] <- delta * sqrt(n_patients[i]/(2*variance))
      
      # prob stop for futility, null
      futility_null <- pnorm(futility_l_bounds[[i]], 
                             mean = mean_0, 
                             sd = sqrt(variance))
      
      # prob stop for efficacy, null
      efficacy_null <- pnorm(efficacy_u_bounds[[i]], 
                             mean = mean_0, 
                             sd = sqrt(variance), 
                             lower.tail = FALSE)
  
      # prob stop for futility, alt
      futility_alt <- pnorm(futility_l_bounds[[i]], 
                            mean = mean_1, 
                            sd = sqrt(variance))
      
      # prob stop for efficacy
      efficacy_alt <- pnorm(efficacy_u_bounds[[i]], 
                            mean = mean_1, 
                            sd = sqrt(variance), 
                            lower.tail = FALSE)
      
      probs_to_return[[i]] <- c(futility_null, efficacy_null, futility_alt, efficacy_alt)
      names(probs_to_return[[i]]) <- c("futility_null", "efficacy_null", "futility_alt", "efficacy_alt")
      
      next
    }
    
    ######################
    # ALL OTHER ANALYSES #
    ######################
    
    # next mean under null
    mean_0[i] <- theta_0 * sqrt(n_patients[i] / (2 * variance))
    
    # next mean under alternative
    mean_1[i] <- delta * sqrt(n_patients[i]/ (2*variance))
    
    # bounds for these will be same
    # futility under null
    futility_null <- pmvnorm(lower = futility_l_bounds[[i]], 
                             upper = futility_u_bounds[[i]], 
                             mean = mean_0, corr = SIGMA_list[[i]])
    # futility under alt
    futility_alt <- pmvnorm(lower = futility_l_bounds[[i]], 
                            upper = futility_u_bounds[[i]], 
                            mean = mean_1, corr = SIGMA_list[[i]])
    
    # bounds for these will be same
    # futility under null
    efficacy_null <- pmvnorm(lower = efficacy_l_bounds[[i]], 
                             upper = efficacy_u_bounds[[i]],
                             mean = mean_0, corr = SIGMA_list[[i]])
    # futility under alt
    efficacy_alt <- pmvnorm(lower = efficacy_l_bounds[[i]], 
                            upper = efficacy_u_bounds[[i]], 
                            mean = mean_1, corr = SIGMA_list[[i]])
    
    probs_to_return[[i]] <- c(futility_null, efficacy_null, futility_alt, efficacy_alt)
    names(probs_to_return[[i]]) <- c("futility_null", "efficacy_null", "futility_alt", "efficacy_alt")
    
  }
  
  
  # return the probabilities
  probs_to_return
}
gsd_simulations(n_analyses = 3,
                upper_bounds = c(2.5, 2, 1.5),
                lower_bounds = c(0, 0.75, 1.5),
                n_patients = c(20, 40, 60),
                null_hypothesis = 0,
                alt_hypothesis = 0.5,
                variance = 1)
## [[1]]
## futility_null efficacy_null  futility_alt  efficacy_alt 
##   0.500000000   0.006209665   0.056923149   0.179084096 
## 
## [[2]]
## futility_null efficacy_null  futility_alt  efficacy_alt 
##    0.29877595    0.01941484    0.04232328    0.41968371 
## 
## [[3]]
## futility_null efficacy_null  futility_alt  efficacy_alt 
##    0.13728762    0.03831221    0.04902569    0.25295399

Calculate expected sample size

The expected sample size is calculated across a set of possible true treatment effects \(\boldsymbol{\delta} = \{\delta_1, \delta_2, \dots, \delta_n \}\) as a function of the design elements (1) number of analyses \(k = \{1, 2, \dots, K\}\), (2) number of patients at each analysis \(\mathbf{n} = \{n_1, n_2, \dots, n_K\}\), (3) the upper and lower bounds \(\mathbf{u} = (u_1, u_2, \dots, u_K)\) and \(\boldsymbol{\ell} = (\ell_1, \ell_2, \dots, \ell_K)\).

The expected sample size is:

\[ E[N \, | \, \delta]=\sum_{i=1}^K n_i P(\text{trial stops after analysis }i \, | \, \delta) \]

The probability that the trial stops after analysis \(i\) is the sum of the probabilities that the trial stops for efficacy after analysis \(i\) and the probability that the trial stops for futility after analysis \(i\).

\[ \begin{multline*} E[N \, | \, \delta]=\sum_{i=1}^K n_i \Big[ P(\text{trial stops for efficacy after analysis }i \, | \, \delta) + \\ P(\text{trial stops for futility after analysis }i \, | \, \delta) \Big] \end{multline*} \]

Starting with the design given in gsd_simulations() function default, we can calculate the expected sample size under \(\delta=0\) to start.

probs_to_return <- gsd_simulations(alt_hypothesis = 0)

This loop then pulls the probabilities under the alternative (given above as \(\delta=0\) in the function call).

# example function inputs
n_patients = c(20, 40, 60)
n_analyses = 3

# vector to collect the probabilities
sum_probs <- c()

for (i in 1:n_analyses) {
  
  # pull the probabilities from the list
  tmp_probs <- probs_to_return[[i]]
  
  # gather them into a vector
  sum_probs <- c(sum_probs, sum(tmp_probs[3:4]))
  
}

# calculate the expected sample size
ess <- sum(n_patients * sum_probs)

We can add this loop to the function gsd_simulations() and then we can graph the expected sample size over a vector of alternative hypotheses \(\boldsymbol{\delta}\).

gsd_simulations <- function(n_analyses = 3, 
                            upper_bounds = c(2.5, 2, 1.5),
                            lower_bounds = c(0, 0.75, 1.5),
                            n_patients = c(20, 40, 60),
                            null_hypothesis = 0,
                            alt_hypothesis = 0.5,
                            variance = 1) {
  
  # sanity checks
  if(length(upper_bounds) != n_analyses) print("Warning: number of upper bounds must equal number of analyses")
  if(length(lower_bounds) != n_analyses) print("Warning: number of lower bounds must equal number of analyses")
  if(length(n_patients) != n_analyses) print("Warning: number of patients vector must equal number of analyses")
  
  if(length(upper_bounds) != length(lower_bounds)) {
    print("Warning: number of upper bounds must equal number of analyses")
  }
  
  # assign values for null and alt hypotheses
  theta_0 <- null_hypothesis
  delta <- alt_hypothesis
  
  # empty mean vectors to fill
  mean_0 <- c()
  mean_1 <- c()
  
  # need to parse the upper and lower boundaries of the design
  # for futility and efficacy, must put the bounds of integration correctly 
  # for pmvnorm
  futility_l_bounds <- list()
  futility_u_bounds <- list()
  efficacy_l_bounds <- list()
  efficacy_u_bounds <- list()

  n_analyses <- length(upper_bounds)

  for (i in 1:n_analyses) {
    
    # special case of i = 1
    if (i == 1) {
      futility_l_bounds[[i]] <- lower_bounds[i]
      futility_u_bounds[[i]] <- upper_bounds[i]
      efficacy_l_bounds[[i]] <- lower_bounds[i]
      efficacy_u_bounds[[i]] <- upper_bounds[i]
      next
    }
    
    # all other cases
    futility_l_bounds[[i]] <- c(lower_bounds[1:i-1], -Inf)
    futility_u_bounds[[i]] <- c(upper_bounds[1:i-1], lower_bounds[i])
    
    efficacy_l_bounds[[i]] <- c(lower_bounds[1:i-1], upper_bounds[i])
    efficacy_u_bounds[[i]] <- c(upper_bounds[1:i-1], Inf)
  }
  
  # list of probabilities to return
  probs_to_return <- list()
  
  # list of SIGMAs
  SIGMA_list <- list()
    
  for (i in 1:n_analyses) {
    if (i == 1) next
    
    # start with diagonal matrix for SIGMA
    SIGMA <- diag(nrow = i)
    
    # n = 2, need to fill all but 11, 22
    # n = 3, need to fill all but 11, 22, 33
    # n = 4, need to fill all but 11, 22, 33, 44
    # etc. 
    for(i in 1:i) {
      for(j  in 1:i) {
        
        # leave the 1s on the diagonal, skip this iteration of for loop
        if(i == j) next
        
        # when i is less than j, the lower number of patients will be in numerator
        if(i < j) SIGMA[i,j] <- sqrt(n_patients[i] / n_patients[j])
        
        # when i is greater than j, the lower number of patients will be in numerator
        if(i > j) SIGMA[i,j] <- sqrt(n_patients[j] / n_patients[i])
        
      }
    }
    
    SIGMA_list[[i]] <- SIGMA
  }
  
  
  for (i in 1:n_analyses) {
    
    ##############
    # ANALYSIS 1 #
    ##############
    if(i == 1) {
      # mean under null
      mean_0[i] <- theta_0 * sqrt(n_patients[i]/(2*variance))
      
      # mean under alternative
      mean_1[i] <- delta * sqrt(n_patients[i]/(2*variance))
      
      # prob stop for futility, null
      futility_null <- pnorm(futility_l_bounds[[i]], 
                             mean = mean_0, 
                             sd = sqrt(variance))
      
      # prob stop for efficacy, null
      efficacy_null <- pnorm(efficacy_u_bounds[[i]], 
                             mean = mean_0, 
                             sd = sqrt(variance), 
                             lower.tail = FALSE)
  
      # prob stop for futility, alt
      futility_alt <- pnorm(futility_l_bounds[[i]], 
                            mean = mean_1, 
                            sd = sqrt(variance))
      
      # prob stop for efficacy
      efficacy_alt <- pnorm(efficacy_u_bounds[[i]], 
                            mean = mean_1, 
                            sd = sqrt(variance), 
                            lower.tail = FALSE)
      
      probs_to_return[[i]] <- c(futility_null, efficacy_null, futility_alt, efficacy_alt)
      names(probs_to_return[[i]]) <- c("futility_null", "efficacy_null", "futility_alt", "efficacy_alt")
      
      next
    }
    
    ######################
    # ALL OTHER ANALYSES #
    ######################
    
    # next mean under null
    mean_0[i] <- theta_0 * sqrt(n_patients[i] / (2 * variance))
    
    # next mean under alternative
    mean_1[i] <- delta * sqrt(n_patients[i]/ (2*variance))
    
    # bounds for these will be same
    # futility under null
    futility_null <- pmvnorm(lower = futility_l_bounds[[i]], 
                             upper = futility_u_bounds[[i]], 
                             mean = mean_0, corr = SIGMA_list[[i]])
    # futility under alt
    futility_alt <- pmvnorm(lower = futility_l_bounds[[i]], 
                            upper = futility_u_bounds[[i]], 
                            mean = mean_1, corr = SIGMA_list[[i]])
    
    # bounds for these will be same
    # futility under null
    efficacy_null <- pmvnorm(lower = efficacy_l_bounds[[i]], 
                             upper = efficacy_u_bounds[[i]],
                             mean = mean_0, corr = SIGMA_list[[i]])
    # futility under alt
    efficacy_alt <- pmvnorm(lower = efficacy_l_bounds[[i]], 
                            upper = efficacy_u_bounds[[i]], 
                            mean = mean_1, corr = SIGMA_list[[i]])
    
    probs_to_return[[i]] <- c(futility_null, efficacy_null, futility_alt, efficacy_alt)
    names(probs_to_return[[i]]) <- c("futility_null", "efficacy_null", "futility_alt", "efficacy_alt")
    
  }
    
  # vector to collect the sum of futility and efficacy probabilities
  sum_probs <- c()
  
  for (i in 1:n_analyses) {
    
    # pull the probabilities from the list
    tmp_probs <- probs_to_return[[i]]
    
    # gather them into a vector
    # 3:4 because we want to calculate under the alternative
    sum_probs <- c(sum_probs, sum(tmp_probs[3:4]))
    
  }
  
  # calculate the expected sample size
  ess <- sum(n_patients * sum_probs)
  
  # add the expected sample size to the list
  return_values <- append(probs_to_return, ess)
  
  # name the list
  names_for_list <- as.vector(sapply("analysis_", paste0, 1:n_analyses))
  names_for_list <- c(names_for_list, "expected_sample_size")
  names(return_values) <- names_for_list
  
  # return probabilities and ESS
  return_values
}

Now we can test the function:

gsd_simulations(alt_hypothesis = 0)
## $analysis_1
## futility_null efficacy_null  futility_alt  efficacy_alt 
##   0.500000000   0.006209665   0.500000000   0.006209665 
## 
## $analysis_2
## futility_null efficacy_null  futility_alt  efficacy_alt 
##    0.29877595    0.01941484    0.29877595    0.01941484 
## 
## $analysis_3
## futility_null efficacy_null  futility_alt  efficacy_alt 
##    0.13728759    0.03831225    0.13728618    0.03830697 
## 
## $expected_sample_size
## [1] 33.38741

Finally, we can calculate the ESS over a range of values:

ess_vector <- c()
delta_vector <- seq(from = 0, to = 1, length.out = 100)

for (i in 1:100) {
  ess_vector[i] <- gsd_simulations(alt_hypothesis = delta_vector[i])$expected_sample_size
}

Then, we can graph this:

ggplot() + 
  geom_line(mapping = aes(x = delta_vector, y = ess_vector)) +
  labs(x = "True \u03B4", y = "Expected sample size", 
       title = "Expected sample size over range of true \u03B4 values")

Gaussian process regression

# load required libraries
library(gplite)
## This is gplite version 0.13.0

An unoptimized GP regression model (raw)

Generate some data for the function to which the model will be fit.

n <- 5
x  <- matrix(seq(-2, 2, length.out = n), ncol = 1)
y  <- x^2

Plot the data.

ggplot() + 
  geom_point(aes(x=x, y=y), size=2) +
  xlab('x') + ylab('y') 

Initialize the GP model.

# Specify the GP model we want to use:
gp_empty <- gp_init(
  # A squared exponential (aka Gaussian aka RBF) kernel
  cfs = cf_sexp(
    vars = NULL,
    lscale = 0.3,
    magn = 1,
    prior_lscale = prior_logunif(),
    prior_magn = prior_logunif(),
    normalize = FALSE
  ),  
  
  # Assume Gaussian distributed errors
  lik = lik_gaussian(
    sigma = 0.5, 
    prior_sigma = prior_logunif()
  ), 
  
  # Use the full covariance (i.e., do not approximate)
  method = method_full() 
)

In order to plot the negative log marginal likelihood for different hyperparameters combinations, we will need to fit the GP model without optimizing (i.e., without using gp_optim).

gp_raw <- gp_fit(gp_empty, x, y)

Plot the model with the raw values.

# compute the predictive mean and variance in a grid of points
xt   <- seq(-4, 4, len=150)
pred <- gp_pred(gp_raw, xt, var = T)

# visualize
mu <- pred$mean
lb <- pred$mean - 2*sqrt(pred$var)
ub <- pred$mean + 2*sqrt(pred$var)

ggplot() + 
  geom_ribbon(aes(x=xt, ymin=lb, ymax=ub), fill='lightgray') +
  geom_line(aes(x=xt, y=mu), linewidth = 0.5) +
  geom_point(aes(x=x, y=y), size=2) +
  xlab('x') + ylab('y')

Pull the negative log marginal likelihood.

gp_energy(gp_raw)
## [1] 18.73287

An optimized GP model

If the GP model is optimized, the negative log marginal likelihood should improve.

# Now fit the model to the data:
gp_optimized <- gp_optim(gp_empty, x, y, verbose = FALSE)
# compute the predictive mean and variance in a grid of points
xt   <- seq(-4, 4, len=150)
pred <- gp_pred(gp_optimized, xt, var = T)

# visualize
mu <- pred$mean
lb <- pred$mean - 2*sqrt(pred$var)
ub <- pred$mean + 2*sqrt(pred$var)

ggplot() + 
  geom_ribbon(aes(x=xt, ymin=lb, ymax=ub), fill='lightgray') +
  geom_line(aes(x=xt, y=mu), linewidth = 0.5) +
  geom_point(aes(x=x, y=y), size=2) +
  xlab('x') + ylab('y')

gp_energy(gp_optimized)
## [1] 11.887

Marginal likelihood surface plots

We can also plot the surface.

Entire surface:

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
fig <- plot_ly() %>% 
  
  add_trace(
    data = surface_plot,
    x = unique(surface_plot$ell), 
    y = unique(surface_plot$sigma_f), 
    z = matrix(surface_plot$energy, nrow = 79, ncol = 79, byrow = TRUE),
    type = "surface") %>%
  
  layout(
    scene = list(
      xaxis = list(
        title = "length-scale parameter"
      ),
      yaxis = list(
        title = "signal variance"
      ),
      zaxis = list(
        title = "-log(marginal likelihood)"
      )
    )
  )

fig

Surface limited to lower values of negative log marginal likelihood:

library(plotly)

fig <- plot_ly() %>% 
  
  add_trace(
    data = surface_plot,
    x = unique(surface_plot$ell)[1:30], 
    y = unique(surface_plot$sigma_f)[50:79], 
    z = matrix(surface_plot$energy, nrow = 79, ncol = 79, byrow = TRUE)[50:79, 1:30],
    type = "surface") %>%
  
  add_trace(
    x = 1.45,
    y = 4,
    z = 11.07394,
    type = "scatter3d",
    mode = "markers"
  ) %>%
  
  layout(
    scene = list(
      aspectmode = 'manual',  # Set to manual for custom aspect ratio
      aspectratio = list(x = 1, y = 1, z = 0.5),  # Adjust x, y, z ratios
      
      xaxis = list(
        title = "length-scale parameter"
      ),
      
      yaxis = list(
        title = "signal variance"
      ),
      
      zaxis = list(
        title = "-log(marginal likelihood)"
      )
    )
  )

fig

Looking at the above plot, it appears that values for sigma_f that are greater than 4 may improve the negative log marginal likelihood further. We can assess this with new GP fits.

ell <- seq(0.5, 3, by = 0.05)
  
sigma_f <- seq(2, 8, by = 0.05)

hyperparams <- expand.grid(
  ell = ell,
  sigma_f = sigma_f
)

# collect energies in the empty vector
energy <- c()

# run 6000+ models
for (i in 1:dim(hyperparams)[1]) {
  
  gp_empty <- gp_init(
    # A squared exponential (aka Gaussian aka RBF) kernel
    cfs = cf_sexp(
      vars = NULL,
      lscale = hyperparams$ell[i],
      magn = hyperparams$sigma_f[i],
      prior_lscale = prior_logunif(),
      prior_magn = prior_logunif(),
      normalize = FALSE
    ),  
    
    # Assume Gaussian distributed errors
    lik = lik_gaussian(
      sigma = 0.01, 
      prior_sigma = prior_logunif()
    ), 
    
    # Use the full covariance (i.e., do not approximate)
    method = method_full() 
  )
  
  gp_raw <- gp_fit(gp_empty, x, y)
  
  energy[i] <- gp_energy(gp_raw)
  
}

surface_plot <- cbind(hyperparams, energy)

surface_plot[which.min(surface_plot$energy),]
##       ell sigma_f   energy
## 4212 1.95     6.1 11.05812

Plot this new run.

fig <- plot_ly() %>% 
  
  add_trace(
    x = unique(surface_plot$sigma_f)[80:121], 
    y = unique(surface_plot$ell)[10:51], 
    z = matrix(surface_plot$energy, nrow = 51, ncol = 121)[10:51, 80:121],
    type = "surface") %>% 
  
  add_trace(
    x = 6.1,
    y = 1.95,
    z = 11.05812,
    type = "scatter3d",
    mode = "markers"
  ) %>%
  
  layout(
    scene = list(
      aspectmode = 'manual',  # Set to manual for custom aspect ratio
      aspectratio = list(x = 1, y = 1, z = 0.5),  # Adjust x, y, z ratios
      
      xaxis = list(
        title = "signal variance"
      ),
      
      yaxis = list(
        title = "length-scale parameter"
      ),
      
      zaxis = list(
        title = "-log(marginal likelihood)"
      )
    )
  )
  
fig

Now that we have explored these surfaces, we can review how the selections of each of these surfaces affects the fitting of the GP regression model.

Modifying length-scale

Model with \(\ell = 2\) and \(\sigma_f^2 = 6\), near the optimal values. Recall that we are setting \(sigma_n^2 = 0.01\).

# Specify the GP model we want to use,
# we will specify the lscale and magn using values from above
gp_empty <- gp_init(
  # A squared exponential (aka Gaussian aka RBF) kernel
  cfs = cf_sexp(
    vars = NULL,
    lscale = 2,
    magn = 6,
    prior_lscale = prior_logunif(),
    prior_magn = prior_logunif(),
    normalize = FALSE
  ),  
  
  # Assume Gaussian distributed errors
  lik = lik_gaussian(
    sigma = 0.01, 
    prior_sigma = prior_logunif()
  ), 
  
  # Use the full covariance (i.e., do not approximate)
  method = method_full() 
)

gp_raw_ell2sigma6 <- gp_fit(gp_empty, x, y)

Plot the model.

# compute the predictive mean and variance in a grid of points
xt   <- seq(-4, 4, len=150)
pred_ell2sigma6 <- gp_pred(gp_raw_ell2sigma6, xt, var = T)

# visualize
mu_ell2sigma6 <- pred_ell2sigma6$mean
lb_ell2sigma6 <- pred_ell2sigma6$mean - 2*sqrt(pred_ell2sigma6$var)
ub_ell2sigma6 <- pred_ell2sigma6$mean + 2*sqrt(pred_ell2sigma6$var)

ggplot() + 
  geom_ribbon(aes(x=xt, ymin=lb_ell2sigma6, ymax=ub_ell2sigma6), fill='lightgray') +
  geom_line(aes(x=xt, y=mu_ell2sigma6), linewidth = 0.5) +
  geom_point(aes(x=x, y=y), size=2) +
  xlab('x') + ylab('y')

Model with \(\ell = 1\) and \(\sigma_f^2 = 6\):

# Specify the GP model we want to use,
# we will specify the lscale and magn using values from above
gp_empty <- gp_init(
  # A squared exponential (aka Gaussian aka RBF) kernel
  cfs = cf_sexp(
    vars = NULL,
    lscale = 1,
    magn = 6,
    prior_lscale = prior_logunif(),
    prior_magn = prior_logunif(),
    normalize = FALSE
  ),  
  
  # Assume Gaussian distributed errors
  lik = lik_gaussian(
    sigma = 0.01, 
    prior_sigma = prior_logunif()
  ), 
  
  # Use the full covariance (i.e., do not approximate)
  method = method_full() 
)

gp_raw_ell1sigma6 <- gp_fit(gp_empty, x, y)

Plot the model.

# compute the predictive mean and variance in a grid of points
pred_ell1sigma6 <- gp_pred(gp_raw_ell1sigma6, xt, var = T)

# visualize
mu_ell1sigma6 <- pred_ell1sigma6$mean
lb_ell1sigma6 <- pred_ell1sigma6$mean - 2*sqrt(pred_ell1sigma6$var)
ub_ell1sigma6 <- pred_ell1sigma6$mean + 2*sqrt(pred_ell1sigma6$var)

ggplot() + 
  geom_ribbon(aes(x=xt, ymin=lb_ell1sigma6, ymax=ub_ell1sigma6), fill='orange', alpha = 0.3) +
  geom_line(aes(x=xt, y=mu_ell1sigma6), linewidth = 0.5, color = "orange") + 
  geom_ribbon(aes(x=xt, ymin=lb_ell2sigma6, ymax=ub_ell2sigma6), fill='lightgray', alpha = 0.6) +
  geom_line(aes(x=xt, y=mu_ell2sigma6), linewidth = 0.5) +
  geom_point(aes(x=x, y=y), size=2) +
  xlab('x') + ylab('y')

Model with \(\ell = 0.1\) and \(\sigma_f^2 = 6\):

# Specify the GP model we want to use,
# we will specify the lscale and magn using values from above
gp_empty <- gp_init(
  # A squared exponential (aka Gaussian aka RBF) kernel
  cfs = cf_sexp(
    vars = NULL,
    lscale = 0.1,
    magn = 6,
    prior_lscale = prior_logunif(),
    prior_magn = prior_logunif(),
    normalize = FALSE
  ),  
  
  # Assume Gaussian distributed errors
  lik = lik_gaussian(
    sigma = 0.01, 
    prior_sigma = prior_logunif()
  ), 
  
  # Use the full covariance (i.e., do not approximate)
  method = method_full() 
)

gp_raw_ell01sigma6 <- gp_fit(gp_empty, x, y)

Plot the model.

# compute the predictive mean and variance in a grid of points
pred_ell01sigma6 <- gp_pred(gp_raw_ell01sigma6, xt, var = T)

# visualize
mu_ell01sigma6 <- pred_ell01sigma6$mean
lb_ell01sigma6 <- pred_ell01sigma6$mean - 2*sqrt(pred_ell01sigma6$var)
ub_ell01sigma6 <- pred_ell01sigma6$mean + 2*sqrt(pred_ell01sigma6$var)

ggplot() + 
  geom_ribbon(aes(x=xt, ymin=lb_ell01sigma6, ymax=ub_ell01sigma6), fill='purple', alpha = 0.3) +
  geom_line(aes(x=xt, y=mu_ell01sigma6), linewidth = 0.5, color = "purple") +
  geom_ribbon(aes(x=xt, ymin=lb_ell1sigma6, ymax=ub_ell1sigma6), fill='orange', alpha = 0.3) +
  geom_line(aes(x=xt, y=mu_ell1sigma6), linewidth = 0.5, color = "orange") + 
  geom_ribbon(aes(x=xt, ymin=lb_ell2sigma6, ymax=ub_ell2sigma6), fill='lightgray', alpha = 0.6) +
  geom_line(aes(x=xt, y=mu_ell2sigma6), linewidth = 0.5) +
  geom_point(aes(x=x, y=y), size=2) +
  xlab('x') + ylab('y')

Modifying signal variance

Plot the “near optimal” model.

ggplot() + 
  geom_ribbon(aes(x=xt, ymin=lb_ell2sigma6, ymax=ub_ell2sigma6), fill='lightgray', alpha = 0.6) +
  geom_line(aes(x=xt, y=mu_ell2sigma6), linewidth = 0.5) +
  geom_point(aes(x=x, y=y), size=2) +
  xlab('x') + ylab('y')

Model with \(\ell = 2\) and \(\sigma_f^2 = 0.1\):

# Specify the GP model we want to use,
# we will specify the lscale and magn using values from above
gp_empty <- gp_init(
  # A squared exponential (aka Gaussian aka RBF) kernel
  cfs = cf_sexp(
    vars = NULL,
    lscale = 2,
    magn = 0.1,
    prior_lscale = prior_logunif(),
    prior_magn = prior_logunif(),
    normalize = FALSE
  ),  
  
  # Assume Gaussian distributed errors
  lik = lik_gaussian(
    sigma = 0.01, 
    prior_sigma = prior_logunif()
  ), 
  
  # Use the full covariance (i.e., do not approximate)
  method = method_full() 
)

gp_raw_ell2sigma01 <- gp_fit(gp_empty, x, y)

Plot the model.

# compute the predictive mean and variance in a grid of points
pred_ell2sigma01 <- gp_pred(gp_raw_ell2sigma01, xt, var = T)

# visualize
mu_ell2sigma01 <- pred_ell2sigma01$mean
lb_ell2sigma01 <- pred_ell2sigma01$mean - 2*sqrt(pred_ell2sigma01$var)
ub_ell2sigma01 <- pred_ell2sigma01$mean + 2*sqrt(pred_ell2sigma01$var)

ggplot() + 
  geom_ribbon(aes(x=xt, ymin=lb_ell2sigma01, ymax=lb_ell2sigma01), fill='orange', alpha = 0.3) +
  geom_line(aes(x=xt, y=mu_ell2sigma01), linewidth = 0.5, color = "orange") +
  geom_ribbon(aes(x=xt, ymin=lb_ell2sigma6, ymax=ub_ell2sigma6), fill='lightgray', alpha = 0.6) +
  geom_line(aes(x=xt, y=mu_ell2sigma6), linewidth = 0.5) +
  geom_point(aes(x=x, y=y), size=2) +
  xlab('x') + ylab('y')

Model with \(\ell = 2\) and \(\sigma_f^2 = 0.03\):

# Specify the GP model we want to use,
# we will specify the lscale and magn using values from above
gp_empty <- gp_init(
  # A squared exponential (aka Gaussian aka RBF) kernel
  cfs = cf_sexp(
    vars = NULL,
    lscale = 2,
    magn = 0.03,
    prior_lscale = prior_logunif(),
    prior_magn = prior_logunif(),
    normalize = FALSE
  ),  
  
  # Assume Gaussian distributed errors
  lik = lik_gaussian(
    sigma = 0.01, 
    prior_sigma = prior_logunif()
  ), 
  
  # Use the full covariance (i.e., do not approximate)
  method = method_full() 
)

gp_raw_ell2sigma003 <- gp_fit(gp_empty, x, y)

Plot the model.

# compute the predictive mean and variance in a grid of points
pred_ell2sigma003 <- gp_pred(gp_raw_ell2sigma003, xt, var = T)

# visualize
mu_ell2sigma003 <- pred_ell2sigma003$mean
lb_ell2sigma003 <- pred_ell2sigma003$mean - 2*sqrt(pred_ell2sigma003$var)
ub_ell2sigma003 <- pred_ell2sigma003$mean + 2*sqrt(pred_ell2sigma003$var)

ggplot() + 
  geom_ribbon(aes(x=xt, ymin=lb_ell2sigma003, ymax=ub_ell2sigma003), fill='purple', alpha = 0.3) +
  geom_line(aes(x=xt, y=mu_ell2sigma003), linewidth = 0.5, color = "purple") +
  geom_ribbon(aes(x=xt, ymin=lb_ell2sigma01, ymax=lb_ell2sigma01), fill='orange', alpha = 0.3) +
  geom_line(aes(x=xt, y=mu_ell2sigma01), linewidth = 0.5, color = "orange") +
  geom_ribbon(aes(x=xt, ymin=lb_ell2sigma6, ymax=ub_ell2sigma6), fill='lightgray', alpha = 0.6) +
  geom_line(aes(x=xt, y=mu_ell2sigma6), linewidth = 0.5) +
  geom_point(aes(x=x, y=y), size=2) +
  xlab('x') + ylab('y')